#!/usr/bin/env pwsh

<#
.SYNOPSIS
    This script scans Azure Storage accounts for blobs with malware scanning tags and removes them from non-malicious versioned blobs.

.DESCRIPTION
    This script connects to an Azure subscription, scans specified or all storage accounts, identifies versioned blobs with malware scanning tags, and removes those tags if the blobs are not malicious.
    It will not delete tags from latest blobs. For that use the script RemoveMalwareScanningIndexTags.ps1.

.PARAMETER SubscriptionId
    The ID of the Azure subscription to scan.

.PARAMETER StorageAccountName
    (Optional) The name of a specific storage account to scan.

.PARAMETER ResourceGroupName
    (Optional) The name of the resource group containing the specified storage account. Required if you are specifying a specific storage account.

.PARAMETER DaysThreshold
    (Optional) The number of days to use as the threshold for recent blobs. Default is 7 days.

.NOTES
    Author: Eitan Shteinberg, Varun Garg
    Version: 1.0
    Date: 2024-06-02

# Prerequisites:
# 1. PowerShell 7
# 2. Azure PowerShell modules:
#    - Az.Accounts
#    - Az.Storage
# 3. Appropriate permissions to access and manage the specified storage accounts:
#    - Owner or Storage Blob Data Contributor role on the storage account.
# 4. The script must be run in a context that has the necessary Azure permissions.
# 5. Tags "Malware Scanning scan time UTC" and "Malware Scanning scan result" must be present on the blobs.
# 6. Azure CLI must be authenticated using 'az login' command if using AAD authentication.
# 7. The script should be executed in a PowerShell environment that supports Azure modules:
#    - Azure Cloud Shell
#    - Local environment with Azure PowerShell modules installed
# 8. Execution policy should allow running scripts (e.g., RemoteSigned).

# Example:
# To run this script, you can use the following command to scan a specific storage account:
# .\RemoveMalwareScanningIndexTags_Vrsions.ps1 -SubscriptionId "your-subscription-id" -ResourceGroupName "your-resource-group" -StorageAccountName "your-storage-account-name" -DaysThreshold 7
# or scan an entire subscription:
# .\RemoveMalwareScanningIndexTags_Vrsions.ps1 -SubscriptionId "your-subscription-id" -DaysThreshold 7
#>

param(
    [Parameter(Mandatory=$true)]
    [string]$SubscriptionId,

    [Parameter(Mandatory=$false)]
    [string]$StorageAccountName,

    [Parameter(Mandatory=$false)]
    [string]$ResourceGroupName,

    [Parameter(Mandatory=$false)]
    [int]$DaysThreshold = 7
)

# Set execution policy
Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope Process -Force

# Constants for tag keys
$tagScanTimeUTC = "Malware Scanning scan time UTC"
$tagScanResult = "Malware Scanning scan result"

# Current date and cutoff date calculation
$now = Get-Date
$cutoffDate = $now.AddDays(-$DaysThreshold).ToString("yyyy-MM-dd")
$filterExpressionTemplate = """$tagScanTimeUTC"" > '$cutoffDate'"

# Blob batch size
$batchSize = 10000

# Counters for reporting
$totalBlobs = 0
$identifiedBlobs = 0
$removedTagsCount = 0
$maliciousBlobsCount = 0
$scannedStorageAccounts = 0
$skippedStorageAccounts = 0
$skippedStorageAccountNames = @()

# Authenticate to Azure using device authentication
Connect-AzAccount -UseDeviceAuthentication

# Set context to the specified subscription
Write-Host "Setting context to subscription '$SubscriptionId'"
Set-AzContext -SubscriptionId $SubscriptionId

# Determine which storage accounts to scan
if ($StorageAccountName) {
    if (-not $ResourceGroupName) {
        Write-Host "Please provide the resource group name for the specified storage account."
        return
    }
    # Get the specified storage account
    Write-Host "Scanning storage account '$StorageAccountName' in resource group '$ResourceGroupName'"
    $storageAccounts = Get-AzStorageAccount -ResourceGroupName $ResourceGroupName -Name $StorageAccountName
} else {
    # Get all storage accounts in the subscription
    Write-Host "Scanning all storage accounts in the subscription"
    $storageAccounts = Get-AzStorageAccount
}

foreach ($storageAccount in $storageAccounts) {
    
    Write-Host ""
    Write-Host "Scanning storage account '$($storageAccount.StorageAccountName)'"

    # Check if key-based authentication is permitted
    $useAadAuth = $false
    try {
        $allowSharedKeyAccess = $storageAccount.AllowSharedKeyAccess
        if ($allowSharedKeyAccess -eq $false) {
            Write-Host "`tKey-based authentication is not permitted on storage account '$($storageAccount.StorageAccountName)'. Trying AAD authentication."
            $useAadAuth = $true
        }
    } catch {
        Write-Host "`tError retrieving properties for storage account '$($storageAccount.StorageAccountName)'. Skipping this account."
        $skippedStorageAccounts++
        $skippedStorageAccountNames += $storageAccount.StorageAccountName
        continue
    }

    $context = $null
    if ($useAadAuth) {
        try {
            $context = New-AzStorageContext -StorageAccountName $storageAccount.StorageAccountName -UseConnectedAccount
            Write-Host "`tCreated storage context using AAD authentication for storage account '$($storageAccount.StorageAccountName)'"
        } catch {
            Write-Host "`tError creating storage context using AAD authentication for storage account '$($storageAccount.StorageAccountName)'. Skipping this account."
            $skippedStorageAccounts++
            $skippedStorageAccountNames += $storageAccount.StorageAccountName
            continue
        }
    } else {
        try {
            $storageAccountKeys = Get-AzStorageAccountKey -ResourceGroupName $storageAccount.ResourceGroupName -Name $storageAccount.StorageAccountName -ErrorAction Stop
            if ($storageAccountKeys -eq $null -or $storageAccountKeys.Count -eq 0) {
                Write-Host "`tNo storage account keys found for storage account '$($storageAccount.StorageAccountName)'. Skipping this account."
                $skippedStorageAccounts++
                $skippedStorageAccountNames += $storageAccount.StorageAccountName
                continue
            }
            $storageAccountKey = $storageAccountKeys[0].Value
            Write-Host "`tCreating storage context using account keys for storage account '$($storageAccount.StorageAccountName)'"
            $context = New-AzStorageContext -StorageAccountName $storageAccount.StorageAccountName -StorageAccountKey $storageAccountKey
        } catch {
            $errorMessage = $_.Exception.Message
            Write-Host "error --  '$errorMessage'"
            if ($errorMessage -like "*deny assignment*") {
                Write-Host "`tAccess denied due to deny assignment for storage account '$($storageAccount.StorageAccountName)'. Skipping this account."
            } else {
                Write-Host "`tError creating storage context using account keys for storage account '$($storageAccount.StorageAccountName)'. Skipping this account."
            }
            $skippedStorageAccounts++
            $skippedStorageAccountNames += $storageAccount.StorageAccountName
            continue
        }
    }

    # Attempt to get containers in the storage account
    try {
        $containers = Get-AzStorageContainer -Context $context
        Write-Host "`t`tRetrieved $($containers.Count) containers in the storage account '$($storageAccount.StorageAccountName)'"
    } catch {
        Write-Host "`t`tSkipping storage account '$($storageAccount.StorageAccountName)' due to access issues."
        $skippedStorageAccounts++
        $skippedStorageAccountNames += $storageAccount.StorageAccountName
        continue
    }

    foreach ($container in $containers) {
        $containerName = $container.Name
        $filterExpression = "@container='$containerName' AND $filterExpressionTemplate"

        # Iterate over blobs in batches
        $token = $null
        do {
            Write-Host "`t`t`tFetching blobs from container '$containerName' in storage account '$($storageAccount.StorageAccountName)'"
            try {
                $blobs = Get-AzStorageBlob -Container $container.Name -Context $context -IncludeVersion -IncludeTag -MaxCount $batchSize -ContinuationToken $token
                $eligibleBlobs = $blobs | Where-Object { !$_.IsLatestVersion -and $_.Tags -ne $null -and $_.Tags.ContainsKey($tagScanTimeUTC) -and $_.Tags[$tagScanTimeUTC] -gt $cutoffDate }

                if ($blobs -eq $null -or $blobs.Count -eq 0) {
                    Break
                }

                # Update the continuation token for the next batch
                $token = $blobs[-1].ContinuationToken

                Write-Host "`t`t`t`tRetrieved $($blobs.Count) out of which $($eligibleBlobs.Count) are eligible"
            } catch {
                Write-Host "`t`t`t`tError accessing blobs in container '$containerName' of storage account '$($storageAccount.StorageAccountName)'. Skipping this container."
                Break
            }

            # Process each blob in the current batch
            foreach ($blob in $eligibleBlobs) {
                $identifiedBlobs++
                Write-Host "`t`t`t`t`tProcessing blob '$($blob.Name), Version: $($blob.VersionId)' in container '$containerName'"

                try {
                    $tags = $blob.Tags
                    Write-Host "`t`t`t`t`t`tRetrieved tags for blob '$($blob.Name), Version: $($blob.VersionId)'"
                } catch {
                    Write-Host "`t`t`t`t`t`tError retrieving tags for blob '$($blob.Name), Version: $($blob.VersionId)'. Skipping this blob."
                    continue
                }

                if ($tags[$tagScanResult] -eq "Malicious") {
                    $maliciousBlobsCount++
                    Write-Host "`t`t`t`t`t`The blob '$($blob.Name), Version: $($blob.VersionId)' is malicious. Keeping the tags."

                } else {
                    # Remove specific index tags if they exist
                    $tagRemoved = $false
                    if ($tags.ContainsKey($tagScanResult)) {
                        $tags.Remove($tagScanResult)
                        $tagRemoved = $true
                    }
                    if ($tags.ContainsKey($tagScanTimeUTC)) {
                        $tags.Remove($tagScanTimeUTC)
                        $tagRemoved = $true
                    }

                    if ($tagRemoved) {
                        try {
                            Set-AzStorageBlobTag -Context $context -BlobBaseClient $blob.BlobBaseClient -Tag $tags
                            $removedTagsCount++
                            Write-Host "`t`t`t`t`t`t`Removed index tags from blob '$($blob.Name), Version: $($blob.VersionId)'"
                        } catch {
                            Write-Host "`t`t`t`t`t`t`Error setting tags for blob '$($blob.Name)', Version: $($blob.VersionId). Skipping this blob."
                        }
                    }
                }
            }

            Write-Host ""

        } while ($null -ne $token)
    }
	$scannedStorageAccounts++
}

# Summary Report
Write-Host ""
Write-Host "Summary:"
Write-Host "Total storage accounts scanned: $scannedStorageAccounts"
if ($skippedStorageAccounts -gt 0) {
    Write-Host "Storage accounts not scanned due to issues: $skippedStorageAccounts"
	Write-Host "Skipped storage accounts: $($skippedStorageAccountNames -join ', ')"
} else {
    Write-Host "Skipped storage accounts: None"
}
Write-Host "Total blobs identified for processing: $identifiedBlobs"
Write-Host "Blobs with removed tags: $removedTagsCount"
Write-Host "Malicious blobs found (tags kept): $maliciousBlobsCount"
Write-Host ""
